#!/bin/bash

set -e
tools_path=/mnt/ilustre/app/medical/tools


if [ -z "$1" ]; then
	echo $0 "<input.vcf>"
	exit 1
fi


log=.pph2.log
if [ ! -e "$log" ]; then
	:> $log
fi

dir_name=pph2_tmp
mkdir $dir_name
echo $dir_name maked 2>>$log 1>&2
echo tep files will be located in $dir_name ... 2>>$log 1>&2
cd $dir_name




snpeff_path=${tools_path}/snpEff
snpsift=${snpeff_path}/SnpSift.jar
script_path=${tools_path}/script
sift_path=${tools_path}/sift4.0.3b
sift_tools=${sift_path}/bin
sift_data=${sift_path}/db
pph_path=${tools_path}/polyphen-2.2.2/bin


java_memory=4g
echo java memory: $java_memory 2>>$log 1>&2

echo
echo extractField ../$1 CHROM POS 2>>$log 1>&2
java -Xmx$java_memory -jar $snpsift extractField ../$1 CHROM POS > 2.txt

echo extractField ../$1 REF ALT 2>>$log 1>&2
java -Xmx$java_memory -jar $snpsift extractField ../$1 REF ALT > 1.ref.alt.txt

# considering grchb37 and hg19 are the same in some extent...
cat 2.txt |awk '{sub(/^/, "chr");sub(/\t/, ":"); print}' > 1.chr_pos.txt
cat 1.ref.alt.txt |awk '{sub(/\t/, "\/"); print}' > 1.ref_alt.txt 2>>$log
paste 1.chr_pos.txt 1.ref_alt.txt | awk 'NR>1 {print}' > ./1.pph.txt


perl ${pph_path}/mapsnps.pl \
-mU \
-y subs.pph.input \
1.pph.txt \
1>snps.features \
2>.mapsnps.log

perl ${pph_path}/run_pph.pl \
subs.pph.input \
1>pph.features \
2>.pph.log

perl ${pph_path}/run_weka.pl \
pph.features \
1>1.pph.predictions

perl $script_path/pph_res_format1_0520.pl 1.pph.predictions

cat 1.pph.pos.txt |awk -F "|" '{print $1}' > 1.pph_pos.txt

paste 1.pph_pos.txt 1.pph.score.txt > 1.pph_score.txt

perl ${script_path}/pph_res_format2.pl 1.pph.txt 1.pph_score.txt

cat 1.pph_score2.txt |awk '{print $3}' > ../1.pph_score3.txt

